Hadley Wickham: grammar of graphics
Hans Rosling: GapMinder
Gapminder World - Wealth & Health of Nations
ggplotsuppressPackageStartupMessages({
library(dplyr)
library(ggplot2)
library(gapminder)
})
# preview data
gapminder
## Source: local data frame [1,704 x 6]
##
## country continent year lifeExp pop gdpPercap
## (fctr) (fctr) (int) (dbl) (int) (dbl)
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## .. ... ... ... ... ... ...
# get range of available data
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
# setup dataframe
g = gapminder %>%
filter(year==2007) %>% # most recent year
mutate(pop_m = pop/1e6) # population, millions
# plot scatterplot of most recent year
s = ggplot(g, aes(x=gdpPercap, y=lifeExp)) +
geom_point()
s
# add aesthetic of size by population
s = s +
aes(size=pop_m)
s
# add aesthetic of color by continent
s = s +
aes(color=continent)
s
# add title, update axes labels
s = s +
ggtitle('Health & Wealth of Nations for 2007') +
xlab('GDP per capita ($/year)') +
ylab('Life expectancy (years)')
s
# label legend
s = s +
scale_colour_discrete(name='Continent') +
scale_size_continuous(name='Population (M)')
s
Your Turn
Now with country emissions datasets…
# boxplot by continent
b = ggplot(g, aes(x=continent, y=lifeExp)) +
geom_boxplot()
b
# match color to continents, like scatterplot
b = b +
aes(fill=continent)
b
# drop legend, add title, update axes labels
b = b +
theme(legend.position='none') +
ggtitle('Life Expectancy by Continent for 2007') +
xlab('Continent') +
ylab('Life expectancy (years)')
b
Your Turn: Make a similar plot but for gdpPercap. Be sure to update the plot’s aesthetic, axis label and title accordingly.
plotlysuppressPackageStartupMessages({
library(plotly) # install.packages('plotly')
})
# scatterplot (Note: key=country shows up on rollover)
s = ggplot(g, aes(x=gdpPercap, y=lifeExp, key=country)) +
geom_point()
ggplotly(s)
# boxplot
ggplotly(b)
Your Turn: Expand the interactive scatterplot to include all the other bells and whistles of the previous plot in one continuous set of code (no in between setting of s).
library(explodingboxplotR) # devtools::install_github('timelyportfolio/explodingboxplotR')
exploding_boxplot(g,
y = 'lifeExp',
group = 'continent',
color = 'continent',
label = 'country')
The googleVis package ports most of the Google charts functionality.
For every R chunk must set option results='asis', and once before any googleVis plots, set op <- options(gvis.plot.tag='chart').
suppressPackageStartupMessages({
library(googleVis) # install.packages('googleVis')
})
op <- options(gvis.plot.tag='chart')
m = gvisMotionChart(
gapminder,
idvar='country',
timevar='year',
xvar='gdpPercap',
yvar='lifeExp',
colorvar='continent',
sizevar='pop')
plot(m)
tmapThematic maps
library(tmap) # install.packages('tmap')
data(World, metro)
metro$growth <- (metro$pop2020 - metro$pop2010) / (metro$pop2010 * 10) * 100
m <- tm_shape(World) +
tm_polygons("income_grp", palette="-Blues", contrast=.7, id="name", title="Income group") +
tm_shape(metro) +
tm_bubbles("pop2010", col = "growth",
border.col = "black", border.alpha = .5,
style="fixed", breaks=c(-Inf, seq(0, 6, by=2), Inf),
palette="-RdYlBu", contrast=1,
title.size="Metro population",
title.col="Growth rate (%)", id="name") +
tm_style_gray() + tm_format_World()
m
tmap_leaflet(m)